% scribe: Tye Lidman
% lastupdate: Oct. 2, 2005
% lecture: 6
% title: Convergence of random variables
% references: Durrett, section 1.5
% keywords: weak law of large numbers, WLLN, pointwise convergence, almost sure convergence, Lp convergence, weak convergence, convergence in distribution, Stout's almost sure convergence, i.o., ev., infinitely often, eventually
% end

\documentclass[12pt,letterpaper]{article}

\include{macros}

\newcommand{\convpoint}{\stackrel{p.w.}{\longrightarrow}}
\newcommand{\conv}{\rightarrow}

\newtheorem{fact}[theorem]{Fact}

\begin{document}

\lecture{6}{Convergence of random variables}{Tye Lidman}{tlid@berkeley.edu}

(These notes are a revision of the work of Jin Kim, 2002.)

\section{Convergence of random variables}
% keywords: weak law of large numbers, WLLN, pointwise convergence, almost sure convergence, Lp convergence, weak convergence, convergence in distribution, Stout's almost sure convergence
% end

First significant example: the \emph{weak law of large numbers (WLLN)}.  We want to state that with a general notion of convergence in probability.

\begin{definition}
$\:\:$ Given a sequence of r.v's $X_n$ defined on a probability space $(\Omega ,\mathcal{F} ,\P)$, say $X_n$ \emph{converges in probability} to $X$, $X_n \pcv X$, if $X$ is a r.v.\ on $(\Omega ,\mathcal{F} )$, and for all $\epsilon >0$,
%
$$\lim_{m \to \infty} \P(\lvert X_n-X\rvert >\epsilon ) = 0\mbox{.}$$
\end{definition}

\begin{theorem}[Weak Law of Large Numbers]
Let $X,X_1,X_2,\dotsc$ be i.i.d. with $\E\lvert X\rvert < \infty$.  Then 
$$\frac{1}{n}\sum_{i=1}^n X_i \pcv \E(X)\mbox{.}$$
\end{theorem}


Other notions of convergence of r.v.'s:

{\bf Simplest: } (discussed in previous lectures) is $\conv$.
\par
{\bf Pointwise Convergence: } $X_n(\omega ) \conv X(\omega )$ for all $\omega \in \Omega$.  This is a very strong notion: too strong for many purposes.
\par
{\bf Almost Sure Convergence: } We say $X_n \ascv X$ if $X_n(\omega ) \conv X(\omega )$ for all $\omega \not\in N$, with $\P(N)=0$, or equivalently $\P(\omega : X_n(\omega )\rightarrow X(\omega )$ as  $n \rightarrow \infty)=1$.
\par
{\bf Convergence in $L^p$ $(p\geq 1)$: } We say $X_n \lpcv X$ if $\lVert X_n-X \rVert _p \conv 0 $, i.e. $\lim_{n \rightarrow \infty} \E\lvert X_n-X\rvert ^p = 0$.
\par
{\bf Convergence in Distribution: }(Not really a notion of convergence of r.v.)  A notion of convergence of a probability distribution on $\mathbb{R}$ (or more general space).  We say $X_n \dcv X$ if $\P(X_n\leq x)\conv \P(X\leq x)$ for all $x$ at which the RHS is continuous.

This weak convergence appears in the central limit theorem.
\par

\begin{fact} (See text)
$X_n \dcv X$ 
$\iff$
$\E f(X_n)  \longrightarrow \E f(X)$ for all bounded and continuous function $f$.
\end{fact}

{\bf Properties in Common for } $\pcv , \convpoint , \ascv , \lpcv$: \par
a) $X_n \conv X$, $Y_n \conv Y \Longrightarrow X_n+Y_n \conv X+Y$, $X_nY_n \conv XY$. \par
b) $X_n \conv X \Longleftrightarrow (X_n-X) \conv 0$ (useful and common reduction). \par
c) For all of $\pcv , \ascv ,$ and $\lpcv$ the limit $X$ is unique up to a.s. equivalence. \par
d) Cauchy sequences are convergent (completeness).  (Need a metric to metrize $\pcv$, but that is easily provided. See text.)



\begin{theorem}
The following property holds among the types of convergence. 

\begin{center}
\setlength{\unitlength}{1cm}
\begin{picture}(10,6.5)
\put(1.5,5){\framebox(3,1){$X_n \ascv X$}}
\put(6.5,5){\framebox(3,1){$X_n \lpcv X$}}
\put(4,2.5){\framebox(3,1){$X_n \pcv X$}}
\put(4,0){\framebox(3,1){$X_n \dcv X$}}

%leftarrow
\put(4.5,3.7){\line(-1,1){1.3}}
\put(4.2,3.6){\line(-1,1){1.35}}
\put(4.45,3.5){\line(-3,1){.6}}
\put(4.45,3.5){\line(0,1){.6}}

%rightarrow
\put(6.35,3.7){\line(1,1){1.3}}
\put(6.65,3.6){\line(1,1){1.35}}
\put(6.4,3.5){\line(0,1){.6}}
\put(6.4,3.5){\line(3,1){.6}}
\put(7.5, 4){ $(\ast)$}

%centerarrow
\put(5.4, 2.5){\line(0,-1){1.4}}
\put(5.6, 2.5){\line(0,-1){1.4}}
\put(5.5,1){\line(-1,1){.4}}
\put(5.5,1){\line(1,1){.4}}
\put(7,2){ $(\ast \ast)$}

\end{picture}
\end{center}
\end{theorem}


\begin{proof}
$(\ast)$ can be proved by Chebyshev's inequality (with usually $p=2$):
\[ \P( | X_n - X |  > \epsilon) \le  \frac{\E (| X_n -X | ^p)}{\epsilon^p} \; \]
$(\ast\ast)$ is proved in the text.
\end{proof}


\begin{example}[Moving blip]
(An example showing that almost sure convergence is a stronger condition than
convergence in probability.)
On $[0,1]$ with Lebesgue measure, define $X_n = 1_{(x_n,\,x_n+1)}$ where the addition is interpreted as modulo $1$ and $x_n$ is any sequence with:
$x_{n+1}-x_n \conv 0$ as $x_n \uparrow \infty$ (e.g. $x_n = 1 + \frac{1}{2} + \dotsb + \frac{1}{n}$ or $x_n = \log n$).
$\P(\lvert X_n \rvert > \epsilon) = X_{n+1} - X_n \conv 0$ for all $0<\epsilon<1 \Longrightarrow X_n \pcv 0$, but $X_n$ does not converge almost surely to $0$.
\end{example}

\begin{example}
Suppose that $X_1, X_2, \ldots$ are r.v.'s that have mean $0$, have finite variances, and are uncorrelated.  
Let $S_n = X_1 + \dotsb + X_n$.  If $\sum_{k=1}^\infty \E(X_k^2)<\infty$, then $S_n$ converges in $L^2$ to a limit $S_{\infty}$, hence $S_n \pcv S_{\infty}$, i.e.\  
$\lim_{n\rightarrow\infty} \P(\lvert S_n - S_{\infty} \rvert > \epsilon ) = 0$ 
for all $\epsilon > 0$.  
\end{example}

\begin{proof} 
Look at the Cauchy criterion.  Take $m>n$:
%
$$\E(S_m-S_n)^2=\E\left(\sum_{k=n+1}^m X_k \right)^2 = \negmedspace\sum_{k=n+1}^m \negmedspace \E(X_k^2) \rightarrow 0$$
%
as $m,n \rightarrow \infty$.  Therefore, 
%
$$\sum_{k=1}^\infty \E(X_k^2)<\infty\mbox{.}$$
%
\end{proof}

\begin{fact}
If the $X_n$ are independent (or more generally, martingale distributions), then $S_n\ascv S_\infty$.  
\end{fact}

The proof of this fact is deferred.

\begin{fact}[Stout's Almost Sure Convergence]   There are examples of uncorrelated sequences with 
$\sum_n X_n^2<\infty$ where a.s. convergence fails.
\end{fact}

\section{Preliminaries for Study of a.s. Convergence}
% keywords: i.o., ev., infinitely often, eventually
% end

\begin{definition}
Let $q_n$ be some statement, true or false for each $n$. We say $q_n$ 
\emph{infinitely often} or
$(q_n  \mbox{ i.o.})$ if for all $n$ there is $m \ge n$ such that $q_m$ is true, 
and $(q_n  \mbox{ ev.})$ if there exists $n$ such that for all $m \ge n$, $q_m$ is true.
Now let $q_n$ depend on $\omega$, giving events 
%
$$A_n = \{\omega:q_n(\omega)\mbox{ is true}\}\mbox{.}$$
%
We now have new events, 
%
$$\{A_n  \mbox{ i.o.}\} = \{\omega : \omega \in A_n  \mbox{ i.o.}\} = \bigcap_n \bigcup_{m \ge n} \, A_m\mbox{,}$$
%
and
%
$$\{A_n  \mbox{ ev.}\} = \bigcup_n \bigcap_{m \ge n} \, A_m\mbox{.}$$
%
\end{definition}

In analysis, $1_{(A_n  \mbox{ i.o.})} = \lim_{n \rightarrow \infty}\sup_{m\ge n} 1_{A_m}$ and 
$ 1_{(A_n  \mbox{ ev.})} = \lim_{n \rightarrow \infty}\inf_{m\ge n} 1_{A_m}$.

Given a sequence of events $A_n$ for each $\omega \in \Omega$, consider $1_{A_n(\omega)}$ as a function of $n$, $\omega \longmapsto (1,0,0,1,\dotsc)$.

{\bf Notice (de Morgan)} that $\{A_n  \mbox{ i.o.}\}^c = \{A_n^c  \mbox{ ev.}\}$ and $\{A_n  \mbox{ ev.}\}^c = \{A_n^c  \mbox{ i.o.}\}$

{\bf Observe } $X_n \ascv X \Longleftrightarrow \forall \epsilon > 0 \mbox{, } \P(\lvert X_n - X \rvert > \epsilon \, \mbox{ i.o.}) = 0$.

{\bf Argue this (Facts about convergence)}  $X_n \conv X \Longleftrightarrow \forall \epsilon > 0\mbox{, } \lvert X_n - X \rvert < \epsilon  \mbox{ ev.}$, so 
%
\begin{eqnarray*}
X_n \ascv X & \Longleftrightarrow & \forall \epsilon > 0\mbox{, } \P(\lvert X_n-X \rvert \le \epsilon  \mbox{ ev.}) = 1 \\
& \Longleftrightarrow & \forall \epsilon > 0\mbox{, } \P(\lvert X_n-X \rvert > \epsilon  \mbox{ i.o.}) = 0.
\end{eqnarray*}

\end{document}